set.seed(51)

load(file="~/PM10/pm10_withps1990.Rda")
######## Prune observations that are out of range ###########
dat=subset(dat, outofrange==0)
dat$pscat = as.factor(dat$pscat)

######## Set the Working Directory so that output is saved in the right place ######## 
wd <- "~/PM10/20150502_big_1990/"
setwd(wd)

##############################################################
################ Specify the Array Parameters ################
##############################################################

outcomenames = c("Total_death.2001", "ALL_CVD.2001", "Respiratory.2001")  ### The outcomes that will be considered
denominators = c("Tot_Beneficiary.2001", "pyear.2001", "pyear.2001")
outfiles = c("mort", "cvd", "resp")                                       ### To append the output R object name


##############################################################
###################### Array Starts Here #####################
##############################################################
j = as.numeric(Sys.getenv('SLURM_ARRAY_TASK_ID'))
print(j)

######## Choose which health outcome to use ######## 
dat$h = dat[, outcomenames[j]]                                             ### Set h (outcome variable)
dat$denom = dat[, denominators[j]]                                         ### Set denom (denominator variable)


######## Choose how to transform pollution ######## 
dat$y = log(dat$pm10fu)    #dat$fu - dat$base
dat$ytemp=dat$y
dat$ytemp[is.na(dat$ytemp)]= mean(dat$y, na.rm=TRUE)

######## Choose how to specify the models ######## 
### Big Models ###
mh0=glm(h~pscat + pm10base1990 + Median_income + HS_rate + Urban_rate + Migration_5_year_rate + Current_Smoking_rate_weighted + mean_age.2001 + Female_rate.2001 + White_rate.2001 + Black_rate.2001 + housedens + temperature + logpop +  ytemp + offset(log(denom)), 
        family=poisson(link="log"), data=subset(dat, a==0))
mh1=glm(h~pscat + pm10base1990 + Median_income + HS_rate + Urban_rate + Migration_5_year_rate + Current_Smoking_rate_weighted + mean_age.2001 + Female_rate.2001 + White_rate.2001 + Black_rate.2001 + housedens + temperature + logpop +  ytemp + offset(log(denom)), 
        family=poisson(link="log"), data=subset(dat, a==1))


library(HEIfunctions)

q=2
n=dim(dat)[[1]]
nburn= 0 #5000 the function only outputs iters > nburn
nsamp= 50000 ## nburnin + niter
thin=1
alpha0prop=summary(mh0)$cov.unscaled*1.1
alpha1prop=summary(mh1)$cov.unscaled*.9
ypropsd=rep(.75, n*q)

#### Proposals for log(pm) ######
ypropsd[seq(1,n*q,q)] = log(dat$pm10base1990_1994)*.3  
ypropsd[seq(2,n*q,q)] = log(dat$pm10base1990_1994)*.3 

#### Proposals for pmfu - pmbase ######
# ypropsd[seq(1,n*q,q)] = dat$pm10base1990_1994*.5
# ypropsd[seq(2,n*q,q)] = dat$pm10base1990_1994*.5 

coords   	<- cbind(dat$Longitude, dat$Latitude)
phistart 	<- makephi(coords, 10)
philower 	<- makephi(coords, 4) 
phiupper 	<- makephi(coords, 30)

tuning = list(A = 0.1, psi = 0.2, theta = 1, alpha0=.09*alpha0prop, alpha1=.09*alpha1prop, Y=ypropsd)
prior = list(KIG = rep(.5,2), psi = rep(.5,2), theta1 = rep(philower,2), theta2 = rep(phiupper,2))
starting = list(B=NULL, A = NULL, psi = NULL, theta = phistart, alpha0=mh0$coef, alpha1=mh1$coef)


pollutionformula = as.formula( paste("y ~", paste(all.vars(mh0$formula)[!(all.vars(mh0$formula) %in% c("h", "ytemp", "denom"))], collapse="+"))  )
healthformula = as.formula( paste("h ~", paste(all.vars(mh0$formula)[!(all.vars(mh0$formula) %in% c("h", "ytemp", "denom"))], collapse="+"))  )

starttime=proc.time()
pstratamod=principalstratmod(
formula = pollutionformula, data=dat, trt="a", coords, 
formula_h = healthformula, denom="denom", nsamp, thin, tuning, prior, starting,
outputbinsize = 500, outputfilename = paste("pstratamod_temp_", outfiles[j], ".Rda", sep=""))
rtime=proc.time() - starttime
save(pstratamod, file = paste("pstratamod_", outfiles[j], ".Rda", sep=""))

